from vectorDB import MyVectorDBConnector
from rag import get_embeddings,get_completion
from extract_pdf import extract_text_from_pdf
from ragbot import RAG_Bot
from splitText import split_text
"""
文件分割的粒度决定了 RAG 的精度
"""
vector_db= MyVectorDBConnector("demo_text_spliit",get_embeddings)

# 添加文档
page = extract_text_from_pdf("RAG/rzf.pdf",page_numbers=[0,1],min_line_length=10)
# 完整的文档是输入
chunks = split_text(page,300,100)
print(chunks)
vector_db.add_document(chunks)

bot = RAG_Bot(vector_db,llm_api=get_completion)

user_query='任正非去过美国？'

# 向量数据库 检索
secarch_result = vector_db.search(user_query,2)
for doc in secarch_result['documents'][0]:
    print(doc+'\n')
print("=====huifu-----")
bot.chat(user_query)